******************************************************************************************
* Do-file name:	an_desc_stats_01.do                   
* Task:         create descriptive statistics   
* Last change:  03.04.2023          
* Notes:    
/*
This file contains the code used to generate the results presented in the following tables and figures:
- Table A.3: tab_means_native_czech.txt
*/
******************************************************************************************



******************************************************************************************
*** program setup
******************************************************************************************

version 14.2
clear all
macro drop _all
set linesize 90
set more off
* set trace on
discard
set seed 123456789
*set matsize 2000



******************************************************************************************
*** load working dataset
******************************************************************************************

use "data/desc_stats.dta", clear


******************************************************************************************
*** create new variables
******************************************************************************************

*** create foreigner dummy (all foreigner whenever they arrived in Germany)
gen 	foreign = .
replace foreign = 1  if nation_gr == 2 | nation_gr == 3
replace foreign = 0  if nation_gr != 2 & nation_gr != 3 & nation_gr != .

*** create foreign + czech dummy 
gen 	foreign_czech = foreign


*** drop vars already created
drop non_emp_nat

** labor market status in 1990
bys vsnr_ano: gen  status_1990_h = status  if year == 1990
bys vsnr_ano: egen status_1990   = max(status_1990_h)
drop status_1990_h


sort vsnr_ano year
tsset vsnr_ano year

** labor market status
foreach x of numlist 1/5 {
local y = 1990 + `x'
gen status_`y' = F`x'.status  if year == 1990
	}

** municipality 
foreach x of numlist 1/5 {
local y = 1990 + `x'
gen ao_gem_imp_`y' = F`x'.ao_gem_imp  if year == 1990
	}
gen ao_gem_imp_1990 = ao_gem_imp  if year == 1990

** based on 3-digit occupation: afl abstract task index 
foreach x of numlist 1/5 {
local y = 1990 + `x'
gen bibbafl_abs_3d_`y' = F`x'.bibbafl_abs_3d  if year == 1990
	}
gen bibbafl_abs_3d_1990 = bibbafl_abs_3d  if year == 1990

* stayers in the same region
foreach y of numlist 1991/1995 {
gen d_bibbafl_abs_3d_`y'_90_2 = bibbafl_abs_3d_`y' - bibbafl_abs_3d_1990  if year == 1990 & nation_gr == 1 & status_1990 == 1 & status_`y' == 1 & ao_gem_imp_1990 == ao_gem_imp_`y' & ao_gem_imp_1990 != . & ao_gem_imp_`y' != . & (border_imp_13 == 1 | control_imp == 1) & age >= 16 & age <= 65 & weight_fte != .
	}

save "data/desc_stats_h1.dta", replace


******************************************************************************************
*** create stats on upgrading using abstract intensity measure & avg wage, border & control region
******************************************************************************************

*** Occup. abstract intensity:

** Occup. abstract intensity: border region, 1990-1995
foreach g of numlist 1 2 {
foreach x of numlist 0/5 {
display "---------------------------------------------------------------------------------------------------"
display "Occup. abstract intensity: nation_gr=`g', border region, year: 199`x'"
sum bibbafl_abs_3d [iw=weight_fte]  if border_imp_13 == 1 & status == 1 & nation_gr == `g' & year == 199`x'
sum bibbafl_abs_2d [iw=weight_fte]  if border_imp_13 == 1 & status == 1 & nation_gr == `g' & year == 199`x'
display "---------------------------------------------------------------------------------------------------"
}
}

** Occup. abstract intensity: control region, 1990-1995
foreach g of numlist 1 2 {
foreach x of numlist 0/5 {
display "---------------------------------------------------------------------------------------------------"
display "Occup. abstract intensity: nation_gr=`g', control region, year: 199`x'"
sum bibbafl_abs_3d [iw=weight_fte]  if control_imp == 1 & status == 1 & nation_gr == `g' & year == 199`x'
sum bibbafl_abs_2d [iw=weight_fte]  if control_imp == 1 & status == 1 & nation_gr == `g' & year == 199`x'
display "---------------------------------------------------------------------------------------------------"
}
}

** Occup. abstract intensity: border region, 1990-1995, condi. on occ. task in 1990
foreach g of numlist 1 2 {
foreach z of numlist 1 2 {
foreach x of numlist 0/5 {
display "---------------------------------------------------------------------------------------------------"
display "Occup. abstract intensity: nation_gr=`g', task group in 90:`z', border region, year: 199`x'"
sum bibbafl_abs_3d [iw=weight_fte]  if border_imp_13 == 1 & status == 1 & nation_gr == `g' & year == 199`x' & task_3dig_2_1990 == `z' & status_1990 == 1
sum bibbafl_abs_2d [iw=weight_fte]  if border_imp_13 == 1 & status == 1 & nation_gr == `g' & year == 199`x' & task_3dig_2_1990 == `z' & status_1990 == 1
display "---------------------------------------------------------------------------------------------------"
}
}
}

** Occup. abstract intensity: control region, 1990-1995, condi. on occ. task in 1990
foreach g of numlist 1 2 {
foreach z of numlist 1 2 {
foreach x of numlist 0/5 {
display "---------------------------------------------------------------------------------------------------"
display "Occup. abstract intensity: nation_gr=`g', task group in 90:`z', control region, year: 199`x'"
sum bibbafl_abs_3d [iw=weight_fte]  if control_imp == 1 & status == 1 & nation_gr == `g' & year == 199`x' & task_3dig_2_1990 == `z' & status_1990 == 1
sum bibbafl_abs_2d [iw=weight_fte]  if control_imp == 1 & status == 1 & nation_gr == `g' & year == 199`x' & task_3dig_2_1990 == `z' & status_1990 == 1
display "---------------------------------------------------------------------------------------------------"
}
}
}

** Change in occup. abstract intensity: border/control regions, 1990-1995, stayers in the region
foreach x of numlist 1/5 {
display "---------------------------------------------------------------------------------------------------"
display "Change in occup. abstract intensity: native stayer in the region, border/control region, year: 199`x'"
display "border region"
sum d_bibbafl_abs_3d_199`x'_90_2  if border_imp_13 == 1
display "control region"
sum d_bibbafl_abs_3d_199`x'_90_2  if control_imp == 1
display "---------------------------------------------------------------------------------------------------"
}

** Change in occup. abstract intensity: border/control regions, 1990-1995, stayers in the region, condi. on occ. task in 1990
foreach z of numlist 1 2 {
foreach x of numlist 1/5 {
display "---------------------------------------------------------------------------------------------------"
display "Change in occup. abstract intensity: native stayer in the region, task group in 90:`z', border/control region, year: 199`x'"
display "border region"
sum d_bibbafl_abs_3d_199`x'_90_2  if border_imp_13 == 1 & task_3dig_2_1990 == `z'
display "control region"
sum d_bibbafl_abs_3d_199`x'_90_2  if control_imp == 1   & task_3dig_2_1990 == `z'
display "---------------------------------------------------------------------------------------------------"
}
}


******************************************************************************************
*** create stats on Czech and non-czech
******************************************************************************************

use "data/desc_stats_h1.dta", clear

*** restrict sample
keep if year == 1990 | year == 1992
keep if border_imp_13 == 1
drop if weight_fte == . & status == 1

*** create new variables for non-employed workers
gen 	non_emp_tot	= 1  if emp_tot != 1
replace non_emp_tot = 0  if emp_tot == 1
gen 	non_emp_nat = 1  if emp_tot != 1 & native == 1
replace non_emp_nat = 0  if emp_tot == 1 & native == 1

*** create weight for fte including non-employed individulas
gen 	weight_fte_2 = weight_fte
replace weight_fte_2 = 1  if non_emp_tot == 1

*** all individuals by 2 education and 3 age groups
* low educated (edu2=1)
gen 	all_edu21_age3 = 1  if imp_edu_2 == 1 & (age >= 16 & age <= 29)
replace all_edu21_age3 = 2  if imp_edu_2 == 1 & (age >= 30 & age <= 49)
replace all_edu21_age3 = 3  if imp_edu_2 == 1 & (age >= 50 & age <= 65)
tab all_edu21_age3, gen(all_edu21_age3)

* high educated (edu2=2)
gen 	all_edu22_age3 = 1  if imp_edu_2 == 2 & (age >= 16 & age <= 29)
replace all_edu22_age3 = 2  if imp_edu_2 == 2 & (age >= 30 & age <= 49)
replace all_edu22_age3 = 3  if imp_edu_2 == 2 & (age >= 50 & age <= 65)
tab all_edu22_age3, gen(all_edu22_age3)

*** create number of workers: employed czech, employed native and non-employed native
egen num_employ_czech_92    = total(emp_tot)     if year == 1992 & czech  == 1
egen num_employ_native_90   = total(emp_nat)  	 if year == 1990 & native == 1
egen num_unemploy_native_90 = total(non_emp_nat) if year == 1990 & native == 1

egen num_native_fte_90 = total(weight_fte)  if year == 1990 & native == 1
egen num_czech_fte_92  = total(weight_fte)  if year == 1992 & czech == 1

*** create apprentice dummies
replace emp_nat_app = 0	 		if emp_nat_app != 1 & emp_nat == 1
replace emp_cze_app = 0	 		if emp_cze_app != 1 & emp_cze == 1
replace emp_nat_app_first = 0	if emp_nat_app != 1 & emp_nat == 1
replace emp_cze_app_first = 0	if emp_cze_app != 1 & emp_cze == 1

*** create more industry groups
gen 	ind28_construction = (industry == 17)
replace ind28_construction = .  if industry == .
label variable ind28_construction "1 if construction sector (based on industry, 28 industries classif.)"

gen 	ind28_hotel_restau = (industry == 21)
replace ind28_hotel_restau = .  if industry == .
label variable ind28_hotel_restau "1 if hotel/restaurant sector (based on industry, 28 industries classif.)"

gen 	ind28_other = (industry == 18 | industry == 19 | industry == 20 | industry == 23 | industry == 25 | industry == 26)
replace ind28_other = .  if industry == .
label variable ind28_other "1 if other sector (based on industry, 28 industries classif.)"


*** create descriptive stats
estpost summarize imp_edu_2_low imp_edu_2_high task_3dig_2_man task_3dig_2_abst ///
				  age_3_1 age_3_2 age_3_3 ///
				  bibbafl_abs_3d   ///
			      female impy_2 ind28_* num_native_fte_90 num_employ_native_90  if year == 1990 & native == 1 & emp_nat == 1 [iw=weight_fte_2]
eststo means_emp_native_90


estpost summarize imp_edu_2_low imp_edu_2_high task_3dig_2_man task_3dig_2_abst /// 
				  age_3_1 age_3_2 age_3_3 ///
			      female impy_2 ind28_* num_unemploy_native_90  if year == 1990 & native == 1 & emp_tot != 1 [iw=weight_fte_2]
eststo means_unemp_native_90


estpost summarize imp_edu_2_low imp_edu_2_high task_3dig_2_man task_3dig_2_abst /// 
				  age_3_1 age_3_2 age_3_3 ///
				  bibbafl_abs_3d  ///
			      female impy_2 ind28_* num_czech_fte_92 num_employ_czech_92  if year == 1992 & czech == 1 & emp_tot == 1 [iw=weight_fte_2]
eststo means_czech_92


*** create table
#delimit ;
global estout_means2 "cells("mean(fmt(%12.3fc) vacant(-)) sd(fmt(%12.3fc) vacant(-))") 
rename(num_employ_native_90 num_indiv num_unemploy_native_90 num_indiv num_employ_czech_92 num_indiv 
num_native_fte_90 num_fte_worker num_czech_fte_92 num_fte_worker)
refcat(imp_edu_2_low "Skill distribution" age_3_1 "Age distribution" ind28_tradable "Industries", label(" "))
varlabels(imp_edu_2_low "Low (no postsecondary education)" imp_edu_2_high "High (postsecondary education)"
task_3dig_2_man "Manual/routine (3d)" task_3dig_2_abst "Abstract (3d)"
age_3_1 "Below 30" age_3_2 "30 to 49" age_3_3 "50 and above" female "Share females" bibbafl_abs_3d "Avg. occp. abstract intensity (3d)"  
impy_2 "Mean log wage (imputed)" num_fte_worker "No. full-time equiv. workers" num_indiv "No. individuals"
ind28_tradable "Tradable sector" ind28_publicsector "Public sector" ind28_construction "Construction sector" 
ind28_hotel_restau "Hotel & restaurant sector" ind28_other "Other sectors")
varwidth(38) prehead(@title) posthead() postfoot(@note) nonumbers mlabels("Natives Employed (1990)" "Natives Non-Employed (1990)" "Czechs Employed (1992)") 
collabels(Mean StdD) style(tab)";
#delimit cr


** Table A.3: Characteristics of Natives and Czech Nationals in the Border Region
estout  means_emp_native_90  means_unemp_native_90  means_czech_92 ///
using "tables/tab_means_native_czech.txt", $estout_means2 replace ///	
title(Characteristics of Natives and Czechs in the Border Region) ///
note(Notes: The table compares average characteristics of natives (employed and non-employed) ///
and Czech workers in the border region in 1992 and 1990, respectivley. ///
Data Source: German Social Security Records, border districts and matched control districts, 1986-1992.)



erase "data\agg_level/desc_stats_h1.dta"



******************************************************************************************
*** end
******************************************************************************************

exit


*========================================================================================*
Comments:
- unique identifier: vsnr_ano year
